/* Copyright 2013 Tobias Marschall
 * 
 * This file is part of CLEVER.
 * 
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>

#include <boost/program_options.hpp>
#include <boost/tokenizer.hpp>
#include <boost/lexical_cast.hpp>

#include "HistogramBasedDistribution.h"
#include "VersionInfo.h"

using namespace std;
using namespace boost;
namespace po = boost::program_options;

void usage(const char* name, const po::options_description& options_desc) {
	cerr << "Usage: " << name << " [options] <read-group-file>" << endl;
	cerr << endl;
	cerr << "Prints mean and standard deviation of the internal segment size distribution of each read group." << endl;
	cerr << "The last line gives min/max mean and stddev over all read groups." << endl;
	cerr << endl;
	cerr << "<read-group-file>: file containing two columns <read-group> <distribution-file>," << endl;
	cerr << "                   where <distribution-file> contains the internal segment size" << endl;
	cerr << endl;
	cerr << options_desc << endl;
	exit(1);
}

typedef struct read_group_stats_t {
	string name;
	double mean;
	double stddev;
	read_group_stats_t(string name, double mean, double stddev) : name(name), mean(mean), stddev(stddev) {}
} read_group_stats_t;

auto_ptr<vector<read_group_stats_t> > read_readgroup_list(const string& filename) {
	auto_ptr<vector<read_group_stats_t> > result(new vector<read_group_stats_t>());
	ifstream is(filename.c_str());
	if (is.fail()) {
		cerr << "Error: could not open \"" << filename << "\"." << endl;
		exit(1);
	}
	typedef boost::tokenizer<boost::char_separator<char> > tokenizer_t;
	boost::char_separator<char> whitespace_separator(" \t");
	string line;
	int linenr = 0;
	while (getline(is,line)) {
		linenr += 1;
		tokenizer_t tokenizer(line, whitespace_separator);
		vector<string> tokens(tokenizer.begin(), tokenizer.end());
		if (tokens.size() != 2) {
			ostringstream oss;
			oss << "Error parsing read group list. Offending line: " << linenr;
			throw std::runtime_error(oss.str());
		}
		HistogramBasedDistribution distribution(tokens[1]);
		double mean;
		double stddev;
		distribution.estimateGaussian(&mean, &stddev);
		result->push_back(read_group_stats_t(tokens[0], mean, stddev));
	}
	return result;
}

int main(int argc, char* argv[]) {
	VersionInfo::checkAndPrintVersion("read-group-stats", cerr);
	string commandline = VersionInfo::commandline(argc, argv);
	// PARAMETERS

	po::options_description options_desc("Allowed options");
// 	options_desc.add_options()
// 	    ("verbose,v", po::value<bool>(&verbose)->zero_tokens(), "Be verbose: output additional statistics for each variation.")
// 	;
	
	if (argc<2) {
		usage(argv[0], options_desc);
	}
	string distribution_filename(argv[argc-1]);
	argc -= 1;

	auto_ptr<vector<read_group_stats_t> > readgroup_params(0);
	try {
		readgroup_params = read_readgroup_list(distribution_filename);
	} catch (std::runtime_error& e) {
		cerr << "Error: " << e.what() << endl;
		return 1;
	}
	cerr << "Commandline: " << commandline << endl;
	double min_mean = numeric_limits<double>::infinity();
	double min_stddev = numeric_limits<double>::infinity();
	double max_mean = -numeric_limits<double>::infinity();
	double max_stddev = -numeric_limits<double>::infinity();
	for (size_t i=0; i<readgroup_params->size(); ++i) {
		const read_group_stats_t& s = readgroup_params->at(i);
		cout << s.name << ' ' << s.mean << ' ' << s.stddev << endl;
		if (s.mean < min_mean) min_mean = s.mean;
		if (s.mean > max_mean) max_mean = s.mean;
		if (s.stddev < min_stddev) min_stddev = s.stddev;
		if (s.stddev > max_stddev) max_stddev = s.stddev;
	}
	cout << "> " << min_mean << ' ' << max_mean << ' ' << min_stddev << ' ' << max_stddev << endl;
	return 0;
}
